library(lubridate) # for handling date-times
library(ggplot2) # for visualizing
library(tidyr) # for data formatting
library(dplyr) # for data formatting
library(plotly)# for making interactive graphs

Chapter 2 Hydrology

## Chapter 2.1
####################################################

# read in streamflow data

# Package ID: knb-lter-hbr.2.11 Cataloging System:https://pasta.edirepository.org.
# Data set title: Hubbard Brook Experimental Forest: Daily Streamflow by Watershed, 1956 - present.
inUrl1  <- "https://pasta.lternet.edu/package/data/eml/knb-lter-hbr/2/11/1254d17cbd381556c05afa740d380e78" 
infile1 <- tempfile()
try(download.file(inUrl1,infile1,method="curl"))
if (is.na(file.size(infile1))) download.file(inUrl1,infile1,method="auto")

dt1 <-read.csv(infile1,header=F 
               ,skip=1
               ,sep=","  
               ,quot='"' 
               , col.names=c(
                 "DATE",     
                 "WS",     
                 "Streamflow",     
                 "Flag"    ), check.names=TRUE)
unlink(infile1)

# view the dataset, 182,000 rows
str(dt1)
## 'data.frame':    182024 obs. of  4 variables:
##  $ DATE      : chr  "1956-01-01" "1956-01-02" "1956-01-03" "1956-01-04" ...
##  $ WS        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Streamflow: num  0.274 0.265 0.251 0.248 0.25 ...
##  $ Flag      : num  NA NA NA NA NA NA NA NA NA NA ...
# Notice that WS is numeric, and we want the watersheds to be factors
dt1$WS<-as.factor(dt1$WS)

# Provide columns interpreted as a Date by R
dt1$DATE<-ymd(dt1$DATE) # change how R interprets Date to be a date
dt1$Year<-year(dt1$DATE)   #add a column for 'year' from date
dt1$doy<-yday(dt1$DATE) # add day of year

# add in water year
w.year <- as.numeric(format(dt1$DATE, "%Y"))
june.july.sept <- as.numeric(format(dt1$DATE, "%m")) < 6
w.year[june.july.sept] <- w.year[june.july.sept] - 1

dt1$wyear<-w.year

# add in 'water year station'. 
dt1$wys<-paste(dt1$wyear, dt1$WS)


# make sure you are only using complete years for the record
str.obs<-as.data.frame(table(dt1$WS, dt1$wyear))
str.obs$wys<- paste(str.obs$Var2, str.obs$Var1)
str.obs[str.obs$Freq<350, "Use"]<-"incomplete wyear" # complete is 350+ days
str.obs[is.na(str.obs$Use),"Use"]<-"complete"
#head(str.obs, 11)

# bring complete years from str.obs to dt1
dt1$Use<-str.obs$Use[match(dt1$wys, str.obs$wys)]
dt1.complete<-dt1[dt1$Use=="complete",]


# calculate the annual sum of streamflow measurements
annstr<-aggregate(list(Streamflow=dt1.complete$Streamflow), 
              by=list(WS=dt1.complete$WS, wyear=dt1.complete$wyear), FUN="sum")

# this makes it nicer for working with other HB datasets
annstr$WS<-sub("^","W",annstr$WS)

#  Make a streamflow graph
g1<-ggplot(annstr, aes(x=wyear, y=Streamflow, col=WS))+
  geom_point()+geom_line()+
  ylab("Steamflow (mm)")+xlab("Water year (June 1)")+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p1<-ggplotly(g1)

p1 # is now a plotly object
## Chapter 2.2
####################################################

# read in preciptation data

# Package ID: knb-lter-hbr.14.14 Cataloging System:https://pasta.edirepository.org.
# Data set title: Hubbard Brook Experimental Forest: Total Daily Precipitation by Watershed, 1956 - present.
inUrl1  <- "https://pasta.lternet.edu/package/data/eml/knb-lter-hbr/14/14/c606bfe2f2deb3fa3eabf692ae15f02d" 
infile1 <- tempfile()
try(download.file(inUrl1,infile1,method="curl"))
if (is.na(file.size(infile1))) download.file(inUrl1,infile1,method="auto")

dt2 <-read.csv(infile1,header=F 
               ,skip=1
               ,sep=","  
               , col.names=c(
                 "DATE",     
                 "watershed",     
                 "Precip"    ), check.names=TRUE)

unlink(infile1)

# conduct data formatting
str(dt2)
## 'data.frame':    183939 obs. of  3 variables:
##  $ DATE     : chr  "1956-01-01" "1956-01-02" "1956-01-03" "1956-01-04" ...
##  $ watershed: chr  "W1" "W1" "W1" "W1" ...
##  $ Precip   : num  0 0 8.6 0 0 0 0 18.8 8.1 6.3 ...
# We have a data column, but its not formatted as a date
dt2$DATE<-ymd(dt2$DATE) # change how R interprets Date to be a date
dt2$Year<-year(dt2$DATE)

# add in water year
w.year <- as.numeric(format(dt2$DATE, "%Y"))
june.july.sept <- as.numeric(format(dt2$DATE, "%m")) < 6
w.year[june.july.sept] <- w.year[june.july.sept] - 1

dt2$wyear<-w.year # add water year as a column to precip dataset

# add in water year-station
dt2$wys<-paste(dt2$wyear, dt2$watershed)


# make sure you are only using complete years for the record
pre.obs<-as.data.frame(table(dt2$watershed , dt2$wyear))
pre.obs$wys<- paste(pre.obs$Var2, pre.obs$Var1)
pre.obs[pre.obs$Freq<350, "Use"]<-"incomplete wyear" # complete is 350+ days
pre.obs[is.na(pre.obs$Use),"Use"]<-"complete"
#head(pre.obs, 11)

dt2$Use<-pre.obs$Use[match(dt2$wys, pre.obs$wys)]
dt2.complete<-dt2[dt2$Use=="complete",]

# get annual sums
annpre<-aggregate(list(Precip=dt2.complete$Precip),
      by=list(WS=dt2.complete$watershed, wyear=dt2.complete$wyear), FUN="sum")

g2<-ggplot(annpre, aes(x=wyear, y=Precip, col=WS))+
  geom_point()+geom_line()+
  ylab("Precip (mm)")+xlab("Water year (June 1)")+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
p2<-ggplotly(g2)
p2
## Chapter 2.3
####################################################

# calculate Actual EvapoTranspiration (AET) #  Precip - Streamflow = AET

### give the watersheds and years a unique ID. watershed-year, wsy
annstr$wsy<-paste(annstr$WS, annstr$wyear)
annpre$wsy<-paste(annpre$WS, annpre$wyear)

# bring in precip data to streamflow object
annstr$Precip<-annpre$Precip[match(annstr$wsy, annpre$wsy)]

# calculate AET, assuming no change in storage
annstr$AET<-annstr$Precip - annstr$Streamflow


g3<-ggplot(annstr, aes(x=wyear, y=AET, col=WS))+
  geom_point()+geom_line()+
  ylab("AET (mm)")+xlab("Water year (June 1st)")+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

p3<-ggplotly(g3)
p3
## Chapter 2.4
####################################################

# # read in air temperature data,  compare AET with Average annual air temp

# Package ID: knb-lter-hbr.59.10 Cataloging System:https://pasta.edirepository.org.
# Data set title: Hubbard Brook Experimental Forest: Daily Temperature Record, 1955 - present.
inUrl1  <- "https://pasta.lternet.edu/package/data/eml/knb-lter-hbr/59/10/9723086870f14b48409869f6c06d6aa8" 
infile1 <- tempfile()
try(download.file(inUrl1,infile1,method="curl"))
if (is.na(file.size(infile1))) download.file(inUrl1,infile1,method="auto")


dt3 <-read.csv(infile1,header=F 
               ,skip=1
               ,sep=","  
               , col.names=c(
                 "date",     
                 "STA",     
                 "MAX",     
                 "MIN",     
                 "AVE",     
                 "Flag"    ), check.names=TRUE)

unlink(infile1)

# We have a data column, but its not formatted as a date
dt3$DATE<-ymd(dt3$date) # change how R interprets Date to be a date
dt3$Year<-year(dt3$DATE)

# add in water year
w.year <- as.numeric(format(dt3$DATE, "%Y"))
june.july.sept <- as.numeric(format(dt3$DATE, "%m")) < 6
w.year[june.july.sept] <- w.year[june.july.sept] - 1

dt3$wyear<-w.year # add water year as a column to precip dataset
dt3$month<-month(dt3$date)

# add in water year-station
dt3$wys<-paste(dt3$wyear, dt3$STA)


# make sure you are only using complete years for the record
pre.obs<-as.data.frame(table(dt3$STA , dt3$wyear))
pre.obs$wys<- paste(pre.obs$Var2, pre.obs$Var1)
pre.obs[pre.obs$Freq<350, "Use"]<-"incomplete wyear" # complete is 350 or more days
pre.obs[is.na(pre.obs$Use),"Use"]<-"complete"


dt3$Use<-pre.obs$Use[match(dt3$wys, pre.obs$wys)]

dt3.complete<-dt3[dt3$Use=="complete",]

# get annual averages of air
growseas<-dt3.complete[dt3.complete$month >="6" & dt3.complete$month <=9 ,]
annairgrow<-aggregate(list(avtemp=growseas$AVE), by=list( wyear=growseas$wyear), FUN="mean")


#visualize to become familiar with data
gan<-ggplot(annairgrow, aes(x=wyear, y=avtemp))+
  geom_point()+geom_line()+
  ylab("Growing season air temperature (C)")+xlab("Water year (June 1st)")+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())

ggplotly(gan)
# view trend from 1995-2010 of growing season temp and AET

### bring in av temp to annstr
annstr$avtemp<-annairgrow$avtemp[match(annstr$wyear, annairgrow$wyear)]

#subset to 1995-2010
temp_aet<-annstr[annstr$wyear >= "1995" & annstr$wyear <="2010" ,]


ch2<-ggplot(temp_aet, aes(x=avtemp, y=AET, col=WS))+geom_point()+
  geom_smooth(method="lm", se=F)+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  xlab("Average growing season temperature (C) June 1 - Sep 30")+
  ylab("Actual Evapotranspiration")


pch2<-ggplotly(ch2)
pch2

vegetation uses more water in a warming climate.

## Chapter 2.5
####################################################

# Forest cutting to increase water supply?
# WS5 clearcut in whole tree harvest 1984-1985.

# separate out WS5
WS5<-annstr[annstr$WS=="W5",]  

# subset years for periof of time in question
harvest<-WS5[WS5$wyear >= "1970" & WS5$wyear <="1994" ,]

# Use precip and stream discharge to evaluate effect on water yield and AET for 1983 (pre) - 1986 (post clear cut)
harvest$wyear<-as.integer(harvest$wyear)

gharv<-ggplot(harvest, aes(x=wyear, y=AET))+geom_point(col="forest green")+
  geom_vline(xintercept=1983, linetype="dashed", col="red")+
  geom_vline(xintercept=1985, linetype="solid", col="red")+
  geom_line(col="forest green")+scale_x_continuous(limits = c(1980, 1994), breaks = seq(1980, 1994, 5))+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  xlab("Water year (June 1)")+
  ylab("Actual Evapotranspiration (mm)")+
  ggtitle("Hubbard Brook Watershed 5: Whole tree harvest 1983-1985")
pharv<-ggplotly(gharv)
pharv

Results from the WS5 experiment suggest that any increased water supply for human uses that may happen with forest cutting is temporary, that 3 years after harvest the vegetation uses the same amount of water.

## Chapter 3.1
####################################################

# read in monthly fluxes for Chemistry of streamwater data for WS6
# Package ID: knb-lter-hbr.8.17 Cataloging System:https://pasta.edirepository.org.
# Data set title: Hubbard Brook Experimental Forest: Chemistry of Streamwater – Monthly Fluxes, Watershed 6, 1963 - present.
inUrl1  <- "https://pasta.lternet.edu/package/data/eml/knb-lter-hbr/8/17/3312389e77cc5fd06bc8a7c9019de0ed" 
infile1 <- tempfile()
try(download.file(inUrl1,infile1,method="curl"))
if (is.na(file.size(infile1))) download.file(inUrl1,infile1,method="auto")

                   
 dt4 <-read.csv(infile1,header=F 
          ,skip=1
            ,sep=","  
                ,quot='"' 
        , col.names=c(
                    "Year",     
                    "Month",     
                    "Year_Month",     
                    "flow_mm",     
                    "Ca_flux",     
                    "Mg_flux",     
                    "K_flux",     
                    "Na_flux",     
                    "Al_Ferron_flux",     
                    "TMAl_flux",     
                    "OMAl_flux",     
                    "Al_ICP_flux",     
                    "NH4_flux",     
                    "SO4_flux",     
                    "NO3_flux",     
                    "Cl_flux",     
                    "PO4_flux",     
                    "DOC_flux",     
                    "TDN_flux",     
                    "DON_flux",     
                    "DIC_flux",     
                    "SiO2_flux",     
                    "Mn_flux",     
                    "Fe_flux",     
                    "F_flux",     
                    "H_flux",     
                    "pH_volwt",     
                    "SpecCond_volwt",     
                    "ANC_volwt"    ), check.names=TRUE)
               
unlink(infile1)
            
# Fix any interval or ratio columns mistakenly read in as nominal and nominal columns read as numeric or dates read as strings
if (class(dt4$Month)!="factor") dt4$Month<- as.factor(dt4$Month)
if (class(dt4$Year_Month)!="factor") dt4$Year_Month<- as.factor(dt4$Year_Month)
if (class(dt4$flow_mm)=="factor") dt4$flow_mm <-as.numeric(levels(dt4$flow_mm))[as.integer(dt4$flow_mm) ]               
if (class(dt4$flow_mm)=="character") dt4$flow_mm <-as.numeric(dt4$flow_mm)
if (class(dt4$Ca_flux)=="factor") dt4$Ca_flux <-as.numeric(levels(dt4$Ca_flux))[as.integer(dt4$Ca_flux) ]               
if (class(dt4$Ca_flux)=="character") dt4$Ca_flux <-as.numeric(dt4$Ca_flux)

# Convert Missing Values to NA for non-dates
dt4$flow_mm <- ifelse((trimws(as.character(dt4$flow_mm))==trimws("-888.88")),NA,dt4$flow_mm)               
suppressWarnings(dt4$flow_mm <- ifelse(!is.na(as.numeric("-888.88")) & (trimws(as.character(dt4$flow_mm))==as.character(as.numeric("-888.88"))),NA,dt4$flow_mm))
dt4$Ca_flux <- ifelse((trimws(as.character(dt4$Ca_flux))==trimws("-888.88")),NA,dt4$Ca_flux)               
suppressWarnings(dt4$Ca_flux <- ifelse(!is.na(as.numeric("-888.88")) & (trimws(as.character(dt4$Ca_flux))==as.character(as.numeric("-888.88"))),NA,dt4$Ca_flux))


# conduct data formatting
dt4$DATE<-paste0(dt4$Year_Month,"-01") 
dt4$DATE<-ymd(dt4$DATE) # change how R interprets Date to be a date
# add in water year
w.year <- as.numeric(format(dt4$DATE, "%Y"))
june.july.sept <- as.numeric(format(dt4$DATE, "%m")) < 6
w.year[june.july.sept] <- w.year[june.july.sept] - 1
dt4$wyear<-w.year

# make sure you are only using complete years for the record
monchem<-as.data.frame(table( dt4$wyear))
monchem$wys<- paste(monchem$Var1)
monchem[monchem$Freq<12, "Use"]<-"incomplete wyear" # incomplete is less then 12 months
monchem[is.na(monchem$Use),"Use"]<-"complete"


dt4$Use<-monchem$Use[match(dt4$wyear, monchem$wys)]

dt4.complete<-dt4[dt4$Use=="complete",]

# Sum the 12 months to get annual totals
## Ca is in g/ha
annCa_str_WS6<-aggregate(list(Ca.g.ha=dt4.complete$Ca_flux), by=list(wyear=dt4.complete$wyear), FUN="sum")

# has Ca flux from WS6 changed since the 1960s?
gca1<-ggplot(annCa_str_WS6, aes(x=wyear, y=Ca.g.ha))+geom_point()+geom_line()+
  ggtitle("Annual export of Ca in streamwater from WS6 has decreased")+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  xlab("Water year (June 1)")+
  ylab("Ca (g/ha)")
pca1<-ggplotly(gca1)
pca1

Ca flux from WS6 has decreased since the 1960s

     ## Chapter 3.2
####################################################
## read in monthly flux of stream chemistry for WS2
# Package ID: knb-lter-hbr.4.17 Cataloging System:https://pasta.edirepository.org.
# Data set title: Hubbard Brook Experimental Forest: Chemistry of Streamwater – Monthly Fluxes, Watershed 2, 1963 - present.
# Data set creator:    - Hubbard Brook Watershed Ecosystem Record (HBWatER) 
# Contact:    -  Hubbard Brook Ecosystem Study  - hbr-im@lternet.edu
# Stylesheet v2.11 for metadata conversion into program: John H. Porter, Univ. Virginia, jporter@virginia.edu 

inUrl1  <- "https://pasta.lternet.edu/package/data/eml/knb-lter-hbr/4/17/a6aeef15070be913ee2f06f431b9b7a7" 
infile1 <- tempfile()
try(download.file(inUrl1,infile1,method="curl"))
if (is.na(file.size(infile1))) download.file(inUrl1,infile1,method="auto")

                   
 dt5 <-read.csv(infile1,header=F 
          ,skip=1
            ,sep=","  
                ,quot='"' 
        , col.names=c(
                    "Year",     
                    "Month",     
                    "Year_Month",     
                    "flow_mm",     
                    "Ca_flux",     
                    "Mg_flux",     
                    "K_flux",     
                    "Na_flux",     
                    "Al_Ferron_flux",     
                    "TMAl_flux",     
                    "OMAl_flux",     
                    "Al_ICP_flux",     
                    "NH4_flux",     
                    "SO4_flux",     
                    "NO3_flux",     
                    "Cl_flux",     
                    "PO4_flux",     
                    "DOC_flux",     
                    "TDN_flux",     
                    "DON_flux",     
                    "DIC_flux",     
                    "SiO2_flux",     
                    "Mn_flux",     
                    "Fe_flux",     
                    "F_flux",     
                    "H_flux",     
                    "pH_volwt",     
                    "SpecCond_volwt",     
                    "ANC_volwt"    ), check.names=TRUE)
               
unlink(infile1)
            
# Fix any interval or ratio columns mistakenly read in as nominal and nominal columns read as numeric or dates read as strings
                
if (class(dt5$Month)!="factor") dt5$Month<- as.factor(dt5$Month)
if (class(dt5$Year_Month)!="factor") dt5$Year_Month<- as.factor(dt5$Year_Month)
if (class(dt5$flow_mm)=="factor") dt5$flow_mm <-as.numeric(levels(dt5$flow_mm))[as.integer(dt5$flow_mm) ]               
if (class(dt5$flow_mm)=="character") dt5$flow_mm <-as.numeric(dt5$flow_mm)
if (class(dt5$Ca_flux)=="factor") dt5$Ca_flux <-as.numeric(levels(dt5$Ca_flux))[as.integer(dt5$Ca_flux) ]               
if (class(dt5$Ca_flux)=="character") dt5$Ca_flux <-as.numeric(dt5$Ca_flux)

# Convert Missing Values to NA for non-dates
dt5$flow_mm <- ifelse((trimws(as.character(dt5$flow_mm))==trimws("-888.88")),NA,dt5$flow_mm)               
suppressWarnings(dt5$flow_mm <- ifelse(!is.na(as.numeric("-888.88")) & (trimws(as.character(dt5$flow_mm))==as.character(as.numeric("-888.88"))),NA,dt5$flow_mm))
dt5$Ca_flux <- ifelse((trimws(as.character(dt5$Ca_flux))==trimws("-888.88")),NA,dt5$Ca_flux)               
suppressWarnings(dt5$Ca_flux <- ifelse(!is.na(as.numeric("-888.88")) & (trimws(as.character(dt5$Ca_flux))==as.character(as.numeric("-888.88"))),NA,dt5$Ca_flux))

# Here is the structure of the input data frame:
#str(dt5)                            

# conduct data formatting
#head(dt5)
dt5$DATE<-paste0(dt5$Year_Month,"-01") 
dt5$DATE<-ymd(dt5$DATE) # change how R interprets Date to be a date
# add in water year
w.year <- as.numeric(format(dt5$DATE, "%Y"))
june.july.sept <- as.numeric(format(dt5$DATE, "%m")) < 6
w.year[june.july.sept] <- w.year[june.july.sept] - 1
dt5$wyear<-w.year

# make sure you are only using complete years for the record
monchem<-as.data.frame(table( dt5$wyear))

monchem$wys<- paste(monchem$Var1)
monchem[monchem$Freq<12, "Use"]<-"incomplete wyear" # incomplete is less then 12 months
monchem[is.na(monchem$Use),"Use"]<-"complete"
#head(monchem, 50)

dt5$Use<-monchem$Use[match(dt5$wyear, monchem$wys)]

dt5.complete<-dt5[dt5$Use=="complete",]

head(dt5)
##   Year Month Year_Month flow_mm    Ca_flux   Mg_flux   K_flux   Na_flux
## 1 1963     6    1963-06   7.271  175.86860  25.44850   4.3626  76.25590
## 2 1963     7    1963-07   0.994   23.83335   3.59660   0.8612  10.58595
## 3 1963     8    1963-08   3.084   81.17155  11.21325   3.4519  25.28780
## 4 1963     9    1963-09   1.000   25.03930   3.78665   3.5531  11.09735
## 5 1963    10    1963-10   0.245    6.14950   1.21275   4.3855   2.95225
## 6 1963    11    1963-11 100.925 2333.73610 397.70430 404.4160 814.49295
##   Al_Ferron_flux TMAl_flux OMAl_flux Al_ICP_flux NH4_flux SO4_flux NO3_flux
## 1        -888.88   -888.88   -888.88     -888.88  -888.88  -888.88  -888.88
## 2        -888.88   -888.88   -888.88     -888.88  -888.88  -888.88  -888.88
## 3        -888.88   -888.88   -888.88     -888.88  -888.88  -888.88  -888.88
## 4        -888.88   -888.88   -888.88     -888.88  -888.88  -888.88  -888.88
## 5        -888.88   -888.88   -888.88     -888.88  -888.88  -888.88  -888.88
## 6        -888.88   -888.88   -888.88     -888.88  -888.88  -888.88  -888.88
##   Cl_flux PO4_flux DOC_flux TDN_flux DON_flux DIC_flux SiO2_flux Mn_flux
## 1 -888.88  -888.88  -888.88  -888.88  -888.88  -888.88   -888.88 -888.88
## 2 -888.88  -888.88  -888.88  -888.88  -888.88  -888.88   -888.88 -888.88
## 3 -888.88  -888.88  -888.88  -888.88  -888.88  -888.88   -888.88 -888.88
## 4 -888.88  -888.88  -888.88  -888.88  -888.88  -888.88   -888.88 -888.88
## 5 -888.88  -888.88  -888.88  -888.88  -888.88  -888.88   -888.88 -888.88
## 6 -888.88  -888.88  -888.88  -888.88  -888.88  -888.88   -888.88 -888.88
##   Fe_flux  F_flux  H_flux pH_volwt SpecCond_volwt ANC_volwt       DATE wyear
## 1 -888.88 -888.88 -888.88  -888.88        -888.88   -888.88 1963-06-01  1963
## 2 -888.88 -888.88 -888.88  -888.88        -888.88   -888.88 1963-07-01  1963
## 3 -888.88 -888.88 -888.88  -888.88        -888.88   -888.88 1963-08-01  1963
## 4 -888.88 -888.88 -888.88  -888.88        -888.88   -888.88 1963-09-01  1963
## 5 -888.88 -888.88 -888.88  -888.88        -888.88   -888.88 1963-10-01  1963
## 6 -888.88 -888.88 -888.88  -888.88        -888.88   -888.88 1963-11-01  1963
##        Use
## 1 complete
## 2 complete
## 3 complete
## 4 complete
## 5 complete
## 6 complete
# Sum the 12 months to get annual totals
## Ca is in g/ha
annCa_str_WS2<-aggregate(list(Ca.g.ha=dt5.complete$Ca_flux), by=list(wyear=dt5.complete$wyear), FUN="sum")
#head(annCa_str_WS6)


# Combine WS6 and WS2
annCa_str_WS2$Watershed<-"WS2"
annCa_str_WS6$Watershed<-"WS6"


annCa_str_26<-rbind(annCa_str_WS6,annCa_str_WS2)

# has Ca flux from WS6 changed since the 1960s?
gca2<-ggplot(annCa_str_26, aes(x=wyear, y=Ca.g.ha, col=Watershed))+geom_point()+geom_line(size=1)+
  ggtitle("WS2 had higher streamwater Ca compared to reference WS6 after devegetation ")+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  xlab("Water year (June 1)")+
  ylab("Ca (g/ha)")+
  geom_vline(xintercept=1965, linetype="dashed", col="red")+
  geom_vline(xintercept=1968, linetype="solid", col="red")+
  scale_y_log10()

pca2<-ggplotly(gca2)
pca2

Chapter 4 Element input in bulk precip

## Chapter 4.1
####################################################

# read in chemistry of precipitation for W6

# Package ID: knb-lter-hbr.20.11 Cataloging System:https://pasta.edirepository.org.
# Data set title: Hubbard Brook Experimental Forest: Chemistry of Precipitation  Monthly Fluxes, Watershed 6, 1964 - present.

inUrl1  <- "https://pasta.lternet.edu/package/data/eml/knb-lter-hbr/20/11/76b46d5bd60e4912406726ec71610d0a" 
infile1 <- tempfile()
try(download.file(inUrl1,infile1,method="curl"))
if (is.na(file.size(infile1))) download.file(inUrl1,infile1,method="auto")


dt6 <-read.csv(infile1,header=F 
               ,skip=1
               ,sep=","  
               ,quot='"' 
               , col.names=c(
                 "Year",     
                 "Month",     
                 "Year_Month",     
                 "precip_mm",     
                 "Ca_flux",     
                 "Mg_flux",     
                 "K_flux",     
                 "Na_flux",     
                 "Al_Ferron_flux",     
                 "TMAl_flux",     
                 "OMAl_flux",     
                 "Al_ICP_flux",     
                 "NH4_flux",     
                 "SO4_flux",     
                 "NO3_flux",     
                 "Cl_flux",     
                 "PO4_flux",     
                 "DOC_flux",     
                 "TDN_flux",     
                 "DON_flux",     
                 "SiO2_flux",     
                 "Mn_flux",     
                 "Fe_flux",     
                 "F_flux",     
                 "H_flux",     
                 "pH_volwt",     
                 "SpecCond_volwt"    ), check.names=TRUE)

unlink(infile1)


# Fix any interval or ratio columns mistakenly read in as nominal and nominal columns read as numeric or dates read as strings
if (class(dt6$Month)!="factor") dt6$Month<- as.factor(dt6$Month)
if (class(dt6$Year_Month)!="factor") dt6$Year_Month<- as.factor(dt6$Year_Month)
if (class(dt6$precip_mm)=="factor") dt6$precip_mm <-as.numeric(levels(dt6$precip_mm))[as.integer(dt6$precip_mm) ]               
if (class(dt6$precip_mm)=="character") dt6$precip_mm <-as.numeric(dt6$precip_mm)
if (class(dt6$Ca_flux)=="factor") dt6$Ca_flux <-as.numeric(levels(dt6$Ca_flux))[as.integer(dt6$Ca_flux) ]               
if (class(dt6$Ca_flux)=="character") dt6$Ca_flux <-as.numeric(dt6$Ca_flux)

# Convert Missing Values to NA for non-dates
dt6$precip_mm <- ifelse((trimws(as.character(dt6$precip_mm))==trimws("-888.88")),NA,dt6$precip_mm)               
dt6$Ca_flux <- ifelse((trimws(as.character(dt6$Ca_flux))==trimws("-888.88")),NA,dt6$Ca_flux)               


# add in date
dt6$DATE<-paste0(dt6$Year_Month,"-01")  # in in day to year month string
dt6$DATE<-ymd(dt6$DATE) # change how R interprets Date to be a date

# add in water year
w.year <- as.numeric(format(dt6$DATE, "%Y"))
june.july.sept <- as.numeric(format(dt6$DATE, "%m")) < 6
w.year[june.july.sept] <- w.year[june.july.sept] - 1
dt6$wyear<-w.year


# make sure you are only using complete years for the record
monchem<-as.data.frame(table( dt6$wyear))


monchem$wys<- paste(monchem$Var1)
monchem[monchem$Freq<10, "Use"]<-"incomplete wyear" # incomplete is less then 12 months
monchem[is.na(monchem$Use),"Use"]<-"complete"


dt6$Use<-monchem$Use[match(dt6$wyear, monchem$wys)]

dt6.complete<-dt6[dt6$Use=="complete",]

# calculate annual input of Ca into WS6
# Sum the 12 months to get annual totals
## Ca is in g/ha

annCa_precip_WS6<-aggregate(list(Ca.g.ha=dt6.complete$Ca_flux, precip_mm=dt6.complete$precip_mm), by=list(wyear=dt6.complete$wyear), FUN="sum")

annCa_precip_WS6$Watershed<-"WS6"  # add watershed ID

annCa_precip_WS6$stream_Ca.g.ha<-annCa_str_WS6$Ca.g.ha[match(annCa_precip_WS6$wyear, annCa_str_WS6$wyear)]

ginputca1<-ggplot(annCa_precip_WS6, aes(x=wyear, y=Ca.g.ha))+geom_point()+geom_line()+
  ggtitle("Annual input of Ca by precipitation for WS6 has decreased")+xlim(1964,2020)+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  xlab("Water year (June 1)")+
  ylab("Ca (g / ha)")

pinputca1<-ggplotly(ginputca1)
pinputca1

Chapter 5 Nutrient accumulation

## Chapter 5.1
####################################################

# read in vegetation data for WS6 (received from Mary Martin 2022-06-29 by email)
dt7<-read.csv("C:\\Users\\bears\\Downloads\\HubbardBrookWS6_Tree_Biomass_1997.csv")

# Here is the structure of the input data frame:
str(dt7)       
## 'data.frame':    11421 obs. of  9 variables:
##  $ WS       : int  6 6 6 6 6 6 6 6 6 6 ...
##  $ Year     : int  1997 1997 1997 1997 1997 1997 1997 1997 1997 1997 ...
##  $ Plot     : int  1 1 1 1 1 1 1 1 1 2 ...
##  $ SppCode  : chr  "AB" "BF" "BF" "BF" ...
##  $ SppName  : chr  "Beech" "Balsam fir" "Balsam fir" "Balsam fir" ...
##  $ DBH.cm   : num  6.2 8.2 6.2 4.9 2.9 2.1 3.1 4.5 8.5 2.7 ...
##  $ LIVE.DEAD: chr  "LIVE" "LIVE" "LIVE" "LIVE" ...
##  $ ABOVE.kg : num  4.9 10.8 5.2 2.8 0.7 0.4 1.1 2.8 9.2 0.4 ...
##  $ BELOW.kg : num  2.1 2.9 1.3 0.7 0.1 0.1 0.2 0.7 4.2 0.2 ...
tr<-aggregate( list(ABOVE.kg=dt7$ABOVE.kg , BELOW.kg=dt7$BELOW.kg), by=list(Plot=dt7$Plot, Species=dt7$SppName), FUN="sum", na.rm=T)
dim(tr)
## [1] 971   4
head(tr)
##   Plot    Species ABOVE.kg BELOW.kg
## 1    1 Balsam fir   1168.7    420.1
## 2    2 Balsam fir    843.8    312.1
## 3    3 Balsam fir    871.5    311.6
## 4    4 Balsam fir    570.2    206.3
## 5    5 Balsam fir    400.7    167.1
## 6    6 Balsam fir    404.7    149.3
biosp<-ggplot(tr, aes(x=BELOW.kg, y=ABOVE.kg, col=Species))+geom_point()+
   theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  xlab("Belowground biomass (kg)")+
  ylab("Aboveground biomass (kg")
ggplotly(biosp)
# gather above and below to make summing trees found in the watershed easier
trg<-gather(tr, "abe","value", 3:4)
#head(trg)

sum(trg$value) # this is the biomass of the entire region?
## [1] 2969392
## I see a sum of 2,969,392 kg of biomass in W6 in 1997

## divide the biomass total by the watershed area to get a kg/ha value
w6biomass.kg.ha<-sum(trg$value) / 13.2  # kg of biomass per hectare


w6biomass.g.m2<- w6biomass.kg.ha * 1000 / 10000
w6biomass.g.m2
## [1] 22495.4
## I see a sum of 22,495 g/m2 of biomass in 1997


# The chapter says the value was 16,107 g/m2 in 1964

# biomass was higher in 1997 compared to 64.


# for every gram of tree, there is 3.13 mg of Ca,  based on dry weight.
w6ca.g.m2<-w6biomass.g.m2 * 0.00313 # grams Ca per gram of wood
w6ca.g.m2
## [1] 70.41059
# I see 70.4 mg/g of Ca in the biomass

The principal source of Ca for the regrowth of biomass in WS6 was from the soil

## Chapter 5.2
####################################################

# how does this value of Ca in the vegetation from 1965 differ from 1997?

# averaged 50.7 grams of Ca /m2 in 1965.
# is 70.4 g Ca /m2 in 97.

(70.4-50.7 ) / 50.7 # 39% increase in Ca?
## [1] 0.3885602

Ca in biomass: 51 g/m2 in 65, 70 g/m2 in 97?

Chapter 6 Net soil release

## Chapter 6.1
####################################################

#  We've previously generated this dataframe, 
# annCa_precip_WS6  # has annual Ca input from precip, to annual Ca coming out in the water.


# this is to get net Ca soil release.


## weathering input = stream output - precip input +- storage
annCa_precip_WS6$outin<- annCa_precip_WS6$stream_Ca.g.ha - annCa_precip_WS6$Ca.g.ha

strpre<-ggplot(annCa_precip_WS6, aes(x=stream_Ca.g.ha, y=Ca.g.ha, col=wyear, group=wyear))+geom_point()+
  ggtitle(" ")+
  theme_bw()+theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
  xlab("Export of Ca by Streamwater (g/ha/year)")+
  ylab("Input pf Ca by Precipitation (g/ha/year)")+labs(col='Water year (June 1)')
stpe<-ggplotly(strpre)
stpe
w6ca_str_65<-annCa_precip_WS6[annCa_precip_WS6$wyear=="1965","stream_Ca.g.ha"]
w6ca_str_97<-annCa_precip_WS6[annCa_precip_WS6$wyear=="1997","stream_Ca.g.ha"]

w6ca_pre_65<-annCa_precip_WS6[annCa_precip_WS6$wyear=="1965","Ca.g.ha"]
w6ca_pre_97<-annCa_precip_WS6[annCa_precip_WS6$wyear=="1997","Ca.g.ha"]



# compare to Ca g/m2  , previously calculated as w6ca.g.m2
w6ca_wood.g.ha_65<- 50.7 * 10000
w6ca_wood.g.ha_97<-w6ca.g.m2 * 10000


hbw6<-data.frame( Year=c("1965","1997","1965","1997","1965","1997" ),
                Pool=c("stream export","stream export","precip input","precip input","biomass","biomass" ),
                value=c(w6ca_str_65,w6ca_str_97, w6ca_pre_65, w6ca_pre_97, w6ca_wood.g.ha_65, w6ca_wood.g.ha_97 ))

hbw6$Pool<-factor(hbw6$Pool, levels=c("precip input","stream export","biomass"))

hbw6$value.kg<-hbw6$value/1000 # g to kg

ggplot(hbw6, aes(x=Pool, y=value.kg, fill=Year))+geom_bar(stat="identity",position="dodge", col="black")+
   theme_classic()+ylab("Ca (kg / ha / year)")+
  scale_y_log10()+labs(col='Water year (June 1)')

independent estimate of mineral weathering of Ca is 2.8 kg/ha/year. What does this estimate imply about the aboveground